# Use ldatuning package to determine Number of topics. Used full essays for 
# calculations. Code is being shared for anyone interested in our approach. 

library("ldatuning")
library("topicmodels")
library("tm")
library("egg")

# Code below finds a range of plausible values for K. samp is a dataframe that 
# has text in the column named Twelve.

#corp_sat <- Corpus(VectorSource(processed_docs))
#corp_sat <- TermDocumentMatrix(corp_sat)

#result_sat <- FindTopicsNumber(corp_sat, topics = seq(from = 10, to = 150, by = 10),
#                  metrics = c("Griffiths2004", "CaoJuan2009", "Arun2010", "Deveaud2014"),
#                  method = "Gibbs",
#                  control = list(seed = 77),
#                  verbose = TRUE,
#                  mc.cores = 10L)

#FindTopicsNumber_plot(result_sat)

#cao <- scales::rescale(result_sat$CaoJuan2009, to = c(0,1))
#grif <- scales::rescale(result_sat$Griffiths2004, to = c(0,1))
#arun <- scales::rescale(result_sat$Arun2010, to = c(0,1))
#dev <- scales::rescale(result_sat$Deveaud2014, to = c(0,1))

#result_scaled <- as.data.frame(cbind(result_sat$topics, cao, grif, arun, dev))
#names(result_scaled)[1] <- "topics"
#write.csv(result_scaled, "ldatuning_creative_scaled.csv")

#result_melt <- reshape2::melt(result_scaled,id.vars = "topics")

#result_max <- result_melt %>%
#  filter(variable == "grif" |variable == "dev")

#result_min <- result_melt %>%
#  filter(variable == "cao" |variable == "arun")


# max <- ggplot(result_max, aes(x=factor(topics),y=value, group=variable)) +
#  ggtitle("Maximize (Merged Essay)") +
#  theme_minimal()+
#  theme(plot.title = element_text(size = 18),
#        panel.border = element_rect(colour = "black", size = 2, fill = NA),
#        legend.title = element_text(size = 16),
#        legend.text = element_text(size = 14),
#        axis.title.x = element_text(size = 14),
#        axis.title.y = element_text(size = 14),
#        axis.text.x = element_text(size = 12),
#        axis.text.y = element_text(size = 12)) + 
#  geom_line(aes(color=variable))+
#  geom_point(aes(color=variable), size = 3)+ 
#  labs(x = "Topics", y = "Scaled Values", color = "Method") +
#  scale_color_manual(labels = c("Griffiths & Steyvers, 2004 [54]",
#                                "Deveaud et al., 2014 [56]"), 
#                     values = c("#F8766D","#00BFC4")) +
#  scale_shape_manual(values=c(16, 17))+
#  coord_fixed(5)

#min <- ggplot(result_min, aes(x=factor(topics),y=value, group=variable)) +
#  ggtitle("Minimize (Merged Essay)") +
#  theme_minimal()+
#  theme(plot.title = element_text(size = 18),
#        panel.border = element_rect(colour = "black", size = 2, fill = NA),
#        legend.title = element_text(size = 16),
#        legend.text = element_text(size = 14),
#        axis.title.x = element_text(size = 14),
#        axis.title.y = element_text(size = 14),
#        axis.text.x = element_text(size = 12),
#        axis.text.y = element_text(size = 12)) + 
#  geom_line(aes(color=variable))+
#  geom_point(aes(color=variable), size = 3)+ 
#  labs(x = "Topics", y = "Scaled Values", color = "Method") +
#  scale_color_manual(labels = c("Cao et al., 2009 [57]",
#                                "Arun et al., 2010 [55]"), 
#                     values = c("red","blue")) +
#  scale_shape_manual(values=c(16, 17))+
#  coord_fixed(5)

#ggarrange(min,max,nrow = 2)